loading packages needed for the assignment.

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.5     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.0.2     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(bayesrules)
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test

Exercise 4.1

  1. Beta(1.8, 1.8) centers \(\pi\) on 0.5
  2. Beta(3,2) somewhat favors \(\pi\) > 0.5 since alpha is slightly larger than beta
  3. Beta(1,10) strongly favors \(\pi\) < 0.5
  4. Beta(1,3) somewhat favors \(\pi\) < 0.5
  5. Beta(17,2) strongly favors \(\pi\) > 0.5

Exercise 4.2

We can eliminate a and b because the prior is not symmetrical or centered around .5. The prior is right-tailed meaning beta > alpha so we can eliminate f. This leaves c,d, and e which have the same Beta distribution but different y and n.  The liklihood is symmetrical around pi of 0.50 so the correct choice is e where y is half of n.  Below I’ve plotted e to show it matches.

plot_beta_binomial(alpha = 3, beta = 8, y = 2, n = 4)

Exercise 4.3

  1. Beta(3, 10) this gives a right-skewed distribution where pi is lower to indicate it’s really unliklely.
  2. Beta(1,1) this gives a uniform distribution where all pi values between 0 and 1 are equally likley. c)Beta(10,3) this gives a left-skewed distribution where pi is higher to indicate it’s really likely. d)Beta(5,4) gives a distribution that slightly favors pi > 0.50 to indicate Daryl being slightly unsure. e)Beta(4,5) gives a distribution that slightly favors p < 0.50 to indicate Scott being slightly unsure.

Exercise 4.4

Kimya’s prior indicates that she thinks there’s a chance that the shop is closed but she is unsure.

plot_beta(alpha = 1, beta = 2)

Fernando’s prior. Fernando thinks that it is more liklely the shop is closed, but is unsure. He is more sure it’s closed than Kimya.

plot_beta(0.5, 1)

Ciara’s prior. Ciara is very sure the shop is not open.

plot_beta(alpha = 3, beta = 10)

Taylor’s prior. Taylor is sure the shop is open.

plot_beta(alpha = 2, beta = 0.10)

Exercise 4.5: Simulation of posterior

#Kimya's simulation 
set.seed(1000)
kimya_sim <- data.frame(pi = rbeta(10000, 1, 2)) |> #simulate 10000 values of pi with beta(1,2)
  mutate(y = rbinom(10000, size = 7, prob = pi)) #size 7 indicates 7 "trials"
# Keep only the simulated pairs that match our data
kimya_posterior <- kimya_sim |>  
  filter(y == 3)
# Plot the remaining pi values
ggplot(kimya_posterior, aes(x = pi)) + 
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

kimya_posterior |> 
  summarize(mean(pi), sd(pi)) #estimating posterior mean and sd
##   mean(pi)    sd(pi)
## 1 0.400521 0.1500653
#Fernando's simulation
set.seed(2000)
fernando_sim <- data.frame(pi = rbeta(10000, 0.50, 1)) |>  
  mutate(y = rbinom(10000, size = 7, prob = pi))
fernando_posterior <- fernando_sim |>  
  filter(y == 3)
# Plot the remaining pi values
ggplot(fernando_posterior, aes(x = pi)) + 
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

fernando_posterior |> 
  summarize(mean(pi), sd(pi))
##    mean(pi)    sd(pi)
## 1 0.4126212 0.1615311
#Ciara's simulation
set.seed(3000)
ciara_sim <- data.frame(pi = rbeta(10000, 3, 10)) |>  
  mutate(y = rbinom(10000, size = 7, prob = pi))
ciara_posterior <- ciara_sim |>  
  filter(y == 3)
# Plot the remaining pi values
ggplot(ciara_posterior, aes(x = pi)) + 
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ciara_posterior |> 
  summarize(mean(pi), sd(pi))
##    mean(pi)     sd(pi)
## 1 0.2963917 0.09966824
#Taylor's simulation 
taylor_sim <- data.frame(pi = rbeta(10000, 2, 0.10)) |>  
  mutate(y = rbinom(10000, size = 7, prob = pi))
taylor_posterior <- taylor_sim |>  
  filter(y == 3)
# Plot the  pi values that match data
ggplot(taylor_posterior, aes(x = pi)) + 
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

taylor_posterior |> 
  summarize(mean(pi), sd(pi))
##    mean(pi)    sd(pi)
## 1 0.5296116 0.1580043

Exercise 4.6: Exact posterior

We are given y = 3 and n = 7 and we will use these values to find the posterior for each of the above models.

The plots below show the posterior model and the mean of each posterior in purple. Summarize_beta_binomial shows the mean numerically.

#Kimya's model
plot_beta_binomial(alpha = 1, beta = 2, y = 3, n = 7) + geom_vline(xintercept = .40, color = "purple") 

summarize_beta_binomial(alpha = 1, beta = 2, y = 3, n = 7)
##       model alpha beta      mean  mode        var        sd
## 1     prior     1    2 0.3333333 0.000 0.05555556 0.2357023
## 2 posterior     4    6 0.4000000 0.375 0.02181818 0.1477098
#Fernando's model
plot_beta_binomial(alpha = 0.50, beta = 1, y = 3, n = 7) + geom_vline(xintercept = 0.4117647, color = "purple")

summarize_beta_binomial(alpha = 0.50, beta = 1, y = 3, n = 7)
##       model alpha beta      mean      mode        var        sd
## 1     prior   0.5    1 0.3333333 1.0000000 0.08888889 0.2981424
## 2 posterior   3.5    5 0.4117647 0.3846154 0.02549627 0.1596755
#Ciara's model
plot_beta_binomial(alpha = 3, beta = 10, y = 3, n = 7) + geom_vline(xintercept = 0.3000, color = "purple")

summarize_beta_binomial(alpha = 3, beta = 10, y = 3, n = 7)
##       model alpha beta      mean      mode        var        sd
## 1     prior     3   10 0.2307692 0.1818182 0.01267963 0.1126039
## 2 posterior     6   14 0.3000000 0.2777778 0.01000000 0.1000000
#Taylor's model
plot_beta_binomial(alpha = 2, beta = 0.10, y = 3, n = 7) + geom_vline(xintercept = 0.5494505, color = "purple")

summarize_beta_binomial(alpha = 2, beta = 0.10, y = 3, n = 7)
##       model alpha beta      mean      mode        var        sd
## 1     prior     2  0.1 0.9523810 1.0000000 0.01462951 0.1209525
## 2 posterior     5  4.1 0.5494505 0.5633803 0.02451036 0.1565579

Comparing the simulations to the actual posteriors shows that the actual posteriors are very close to the simulated posteriors; they are all within 1 standard deviation of the estimate.

4.7

  1. Alpha and beta values are low (meaning high variance) and right skewed; data shows a left skew (more successes than failure) and are greater values so data has more influence on the posterior.
  2. Alpha is high indicating there’ll be a sharp peak (less variance) and there is a low amount of data so prior has more influence on posterior.B
  3. Posterior is a compromise between prior and data.
  4. Posterior is a compromise between prior and data given that beta indicates success is more unlikely and y and n indicate success is likely and both these sets of values are low.
  5. Data has more influence on the posterior given n = 200, a high number of trials which increases certainty.

4.8 Plot models given in 4.7

#a
plot_beta_binomial(alpha = 1, beta = 4, y = 8 , n = 10) #prior is to the left of the liklihood and posterior. Y and n indicate liklihood will be centered around .8

#b
plot_beta_binomial(alpha = 20, beta = 3, y = 0, n = 1) #Prior and posterior are to the right of liklihood which will be in the shape of a line give y and n. 

#c
plot_beta_binomial(alpha = 4, beta = 2, y = 1, n = 3) #Prior is slightly left skewed given alpah > beta. 

#d
plot_beta_binomial(alpha = 4, beta = 2, y = 10, n = 13) #Liklihood is left slightly left skewed and so is prior. Both are centered around 0.75.

#e
plot_beta_binomial(alpha = 20, beta = 2, y = 10, n=200) #Likelihood will have a sharp peak because of high n and is centered below 0.25. Prior will be left-skewed and is centered above 0.75.

4.9

  1. Prior understanding represented by Beta(7,2) shows that plausible values of pi are around 0.77 (mean). The quantile function shows that 50% of pi values will be between 0.7000 and 0.885.
plot_beta(7,2, mean = TRUE, mode = TRUE)

summarize_beta(7,2)
##        mean      mode        var        sd
## 1 0.7777778 0.8571429 0.01728395 0.1314684
quantile(rbeta(1000, 7,2))
##        0%       25%       50%       75%      100% 
## 0.3248465 0.7007416 0.8001489 0.8850535 0.9986264
  1. Below is a plot of with survey data of y =19 and n = 20.
plot_beta_binomial(alpha = 7, beta = 2, y = 19, n = 20)

summarize_beta_binomial(alpha = 7, beta = 2, y = 19, n = 20)
##       model alpha beta      mean      mode         var         sd
## 1     prior     7    2 0.7777778 0.8571429 0.017283951 0.13146844
## 2 posterior    26    3 0.8965517 0.9259259 0.003091558 0.05560178

With this data, my understanding of pi changed by increasing to 0.896 from 0.777. My certainty of pi has also increased since the liklihood will be narrower based on the data.

  1. If less people preferred dogs in the data then my mean would decrease.
plot_beta_binomial(alpha = 7, beta = 2, y = 1, n = 20)

summarize_beta_binomial(alpha = 7, beta = 2, y = 1, n = 20)
##       model alpha beta      mean      mode        var        sd
## 1     prior     7    2 0.7777778 0.8571429 0.01728395 0.1314684
## 2 posterior     8   21 0.2758621 0.2592593 0.00665874 0.0816011

The posterior mean decreased to 0.275.

  1. If people equally prefer cats and dogs, my posterior mean would decrease, but not in the same magnitude as when much fewer people prefer dogs as in c. 
plot_beta_binomial(alpha = 7, beta = 2, y = 10, n = 20)

summarize_beta_binomial(alpha = 7, beta = 2, y = 10, n = 20)
##       model alpha beta      mean      mode         var         sd
## 1     prior     7    2 0.7777778 0.8571429 0.017283951 0.13146844
## 2 posterior    17   12 0.5862069 0.5925926 0.008085612 0.08992003

Exercise 4.10

  1. In this exercise we’ll use the given prior and pi|(Y=y)~Beta(alpha + y, B+ n- y) to find n and y.

We know alpha + y = 8.5 so we’ll solve for y which is y = 8.5 - alpha = 8 For n we know .5 + n - y = 2.5 or .5 + n - 8.5 = 2.5 so n = 10

summarize_beta_binomial(alpha = .5, beta = .5, y = 8, n = 10)
##       model alpha beta      mean              mode        var        sd
## 1     prior   0.5  0.5 0.5000000           0 and 1 0.12500000 0.3535534
## 2 posterior   8.5  2.5 0.7727273 0.833333333333333 0.01463499 0.1209751
plot_beta_binomial(alpha = .5, beta = .5, y = 8, n = 10)

We will use the above process to find the rest of y and n using vectors.

#finding y for b-f
alpha <- c(0.5, 10, 8, 2, 1)
posta <- c(3.5, 12, 15, 5, 30)

y = posta - alpha 

#finding n for b-f 
beta <- c(0.5, 1, 3, 2, 1)
postb <- c(10.5, 15, 6, 5, 3)
n = postb - beta + y 

Next I’ll plot these models.

#plotting beta binomial 
#b
plot_beta_binomial(alpha = .5, beta = .5, y = 3 , n = 13)

summarize_beta_binomial(alpha = .5, beta = .5, y = 3 , n = 13)
##       model alpha beta mean              mode    var        sd
## 1     prior   0.5  0.5 0.50           0 and 1 0.1250 0.3535534
## 2 posterior   3.5 10.5 0.25 0.208333333333333 0.0125 0.1118034
#c
plot_beta_binomial(alpha = 10, beta = 1, y = 2, n = 16)

summarize_beta_binomial(alpha = 10, beta = 1, y = 2, n = 16)
##       model alpha beta      mean mode         var         sd
## 1     prior    10    1 0.9090909 1.00 0.006887052 0.08298827
## 2 posterior    12   15 0.4444444 0.44 0.008818342 0.09390603
#d
plot_beta_binomial(alpha = 8, beta = 3, y = 7, n = 10)

summarize_beta_binomial(alpha = 8, beta = 3, y = 7, n = 10)
##       model alpha beta      mean      mode         var         sd
## 1     prior     8    3 0.7272727 0.7777778 0.016528926 0.12856487
## 2 posterior    15    6 0.7142857 0.7368421 0.009276438 0.09631427
#e
plot_beta_binomial(alpha = 2, beta = 2, y = 3, n = 6)

summarize_beta_binomial(alpha = 2, beta = 2, y = 3, n = 6)
##       model alpha beta mean mode        var        sd
## 1     prior     2    2  0.5  0.5 0.05000000 0.2236068
## 2 posterior     5    5  0.5  0.5 0.02272727 0.1507557
#f
plot_beta_binomial(alpha = 1, beta = 1, y = 29, n = 31)

summarize_beta_binomial(alpha = 1, beta = 1, y = 29, n = 31)
##       model alpha beta      mean      mode         var         sd
## 1     prior     1    1 0.5000000       NaN 0.083333333 0.28867513
## 2 posterior    30    3 0.9090909 0.9354839 0.002430724 0.04930238

4.11

#a
summarize_beta_binomial(alpha = 1, beta = 1, y = 10, n = 13)
##       model alpha beta      mean      mode        var        sd
## 1     prior     1    1 0.5000000       NaN 0.08333333 0.2886751
## 2 posterior    11    4 0.7333333 0.7692308 0.01222222 0.1105542
plot_beta_binomial(alpha = 1, beta = 1, y = 10, n = 13) #posterior is Beta(11,4)

#b
summarize_beta_binomial(alpha = 1, beta = 1, y = 0, n = 1)
##       model alpha beta      mean mode        var        sd
## 1     prior     1    1 0.5000000  NaN 0.08333333 0.2886751
## 2 posterior     1    2 0.3333333    0 0.05555556 0.2357023
plot_beta_binomial(alpha = 1, beta = 1, y = 0, n = 1) #posterior is Beta(1,2)

#c
summarize_beta_binomial(alpha = 1, beta = 1, y = 100, n = 130)
##       model alpha beta      mean      mode         var         sd
## 1     prior     1    1 0.5000000       NaN 0.083333333 0.28867513
## 2 posterior   101   31 0.7651515 0.7692308 0.001351088 0.03675715
plot_beta_binomial(alpha = 1, beta = 1, y = 100, n = 130) #posterior is Beta(101, 31)

#d
summarize_beta_binomial(alpha = 1, beta = 1, y = 20, n = 120)
##       model alpha beta      mean      mode         var         sd
## 1     prior     1    1 0.5000000       NaN 0.083333333 0.28867513
## 2 posterior    21  101 0.1721311 0.1666667 0.001158553 0.03403752
plot_beta_binomial(alpha = 1, beta = 1, y = 20, n = 120) #posterior is Beta(21, 101)

#e
summarize_beta_binomial(alpha = 1, beta = 1, y = 234, n = 468)
##       model alpha beta mean mode          var         sd
## 1     prior     1    1  0.5  NaN 0.0833333333 0.28867513
## 2 posterior   235  235  0.5  0.5 0.0005307856 0.02303878
plot_beta_binomial(alpha = 1, beta = 1, y = 234, n = 468) #posterior is Beta(235,235)

Exercise 4.12 Repeating 4.11 with prior pi~Beta(10,2)

#a
summarize_beta_binomial(alpha = 10, beta = 2, y = 10, n = 13)
##       model alpha beta      mean     mode         var         sd
## 1     prior    10    2 0.8333333 0.900000 0.010683761 0.10336228
## 2 posterior    20    5 0.8000000 0.826087 0.006153846 0.07844645
plot_beta_binomial(alpha = 10, beta = 2, y = 10, n = 13) #posterior is Beta(20,5)

#b
summarize_beta_binomial(alpha = 10, beta = 2, y = 0, n = 1)
##       model alpha beta      mean      mode        var        sd
## 1     prior    10    2 0.8333333 0.9000000 0.01068376 0.1033623
## 2 posterior    10    3 0.7692308 0.8181818 0.01267963 0.1126039
plot_beta_binomial(alpha = 10, beta = 2, y = 0, n = 1) #posterior is Beta(10,3)

#c
summarize_beta_binomial(alpha = 10, beta = 2, y = 100, n = 130)
##       model alpha beta      mean      mode         var         sd
## 1     prior    10    2 0.8333333 0.9000000 0.010683761 0.10336228
## 2 posterior   110   32 0.7746479 0.7785714 0.001220759 0.03493936
plot_beta_binomial(alpha = 10, beta = 2, y = 100, n = 130) #posterior is Beta(110, 32)

#d
summarize_beta_binomial(alpha = 10, beta = 2, y = 20, n = 120)
##       model alpha beta      mean      mode        var        sd
## 1     prior    10    2 0.8333333 0.9000000 0.01068376 0.1033623
## 2 posterior    30  102 0.2272727 0.2230769 0.00132045 0.0363380
plot_beta_binomial(alpha = 10, beta = 2, y = 20, n = 120) #posterior is Beta(30, 102)

#e
summarize_beta_binomial(alpha = 10, beta = 2, y = 234, n = 468)
##       model alpha beta      mean      mode          var         sd
## 1     prior    10    2 0.8333333 0.9000000 0.0106837607 0.10336228
## 2 posterior   244  236 0.5083333 0.5083682 0.0005196061 0.02279487
plot_beta_binomial(alpha = 10, beta = 2, y = 234, n = 468) #posterior is Beta(244,236)

Exercise 4.13

  1. The politician’s prior understanding can be represented by Unif(0.5,1).
#plotting uniform distribution with code modified from https://www.statology.org/plot-uniform-distribution-in-r/

#define x-axis
x <- seq(0.0, 1.5, length=50)

#calculate uniform distribution probabilities
y <- dunif(x, min = 0.50, max = 1)

#plot uniform distribution
plot(x, y, type = 'l')

  1. The politician thinks that that his approval rating being any value between 50% is and 100% is equally likely.

  2. construct a formula and sketch posterior understanding if y = 0 and n = 100. We’ll use pi|(Y=y)~Beta(alpha + y, B+ n- y)

posterior alpha = 0.5 + 0, posterior beta = 1 + 100 - 0 so posterior is Beta(0.50, 101)

plot_beta(0.50, 101)

summarize_beta(0.50, 101)
##          mean mode          var          sd
## 1 0.004926108    0 4.782285e-05 0.006915407
  1. The posterior model above shows a mean that approaches 0. The exact posterior mean is 0.004926108 and we can see in the model that all f(pi) between 0.50 > pi < 1 are close to this mean. This is telling us that based on the data and our limitation of the posterior being on the same scale of the prior, that his support is very low, but we don’t see much variation. The politician’s prior is too limited on it’s x axis to account for all possible values of his approval rating. A uniform prior of Beta(1,1) would have even given a more useful posterior than the current one. Ideally, the prior would allow for a larger x axis for to allow for more possibilities even if it favors the politician having high ratings. The difference priors of Beta(1,1) and Beta (4,2) make for the posterior plot are shown below.
plot_beta_binomial(alpha = 1, beta = 1, y = 0, n = 100) #uniform distribution that allows for all values 0>pi<1

plot_beta_binomial(alpha = 20, beta = 10, y = 0, n = 100) #beta prior that shows support for politician may be high

Exercise 4.15

#a
plot_beta_binomial(alpha = 2, beta = 3, y = 1, n = 1) #Updating after first observation is a success 

summarize_beta_binomial(alpha = 2, beta = 3, y = 1, n = 1) #posterior Beta(3,3)
##       model alpha beta mean      mode        var        sd
## 1     prior     2    3  0.4 0.3333333 0.04000000 0.2000000
## 2 posterior     3    3  0.5 0.5000000 0.03571429 0.1889822
#b
plot_beta_binomial(alpha = 3, beta = 3, y = 2, n = 2) #updating after second success and using first posterior as updated prior 

summarize_beta_binomial(alpha = 3, beta = 3, y =2, n = 2) #Posterior is Beta(5,3)
##       model alpha beta  mean      mode        var        sd
## 1     prior     3    3 0.500 0.5000000 0.03571429 0.1889822
## 2 posterior     5    3 0.625 0.6666667 0.02604167 0.1613743
#c
plot_beta_binomial(alpha = 5, beta = 3, y = 2, n = 3) #update after failure and with a new prior 

summarize_beta_binomial(alpha = 5, beta = 3, y = 2, n = 3)#posterior is Beta(7,4)
##       model alpha beta      mean      mode        var        sd
## 1     prior     5    3 0.6250000 0.6666667 0.02604167 0.1613743
## 2 posterior     7    4 0.6363636 0.6666667 0.01928375 0.1388659
#d
plot_beta_binomial(alpha = 7, beta = 4, y = 3, n = 4) #update after 4th trial is success

summarize_beta_binomial(alpha = 7, beta = 4, y = 3, n = 4) #final posterior is Beta(10, 5)
##       model alpha beta      mean      mode        var        sd
## 1     prior     7    4 0.6363636 0.6666667 0.01928375 0.1388659
## 2 posterior    10    5 0.6666667 0.6923077 0.01388889 0.1178511

Exercise 4.16

#a
plot_beta_binomial(alpha = 2, beta = 3, y = 3, n = 5) #Updating after first observation is 3 out 5 successes

summarize_beta_binomial(alpha = 2, beta = 3, y = 3, n = 5) #posterior is Beta(5,5)
##       model alpha beta mean      mode        var        sd
## 1     prior     2    3  0.4 0.3333333 0.04000000 0.2000000
## 2 posterior     5    5  0.5 0.5000000 0.02272727 0.1507557
#b
plot_beta_binomial(alpha = 5, beta = 5, y = 4, n = 10) #updating after second set of observations is 1 out of 5 for a total of 4 out 10 successes

summarize_beta_binomial(alpha = 5, beta = 5, y = 4, n = 10) #posterior is Beta(9,11)
##       model alpha beta mean      mode        var        sd
## 1     prior     5    5 0.50 0.5000000 0.02272727 0.1507557
## 2 posterior     9   11 0.45 0.4444444 0.01178571 0.1085620
#c
plot_beta_binomial(alpha = 9, beta = 11, y = 5, n = 15) #update after 3rd round of observations with 1 success out of 5 for a total of 5 out of 15 successes

summarize_beta_binomial(alpha = 9, beta = 11, y = 5, n = 15) #posterior is Beta(9,16)
##       model alpha beta mean      mode         var         sd
## 1     prior     9   11 0.45 0.4444444 0.011785714 0.10856203
## 2 posterior    14   21 0.40 0.3939394 0.006666667 0.08164966
#d
plot_beta_binomial(alpha = 9, beta = 16, y = 7, n = 20) #update after 4th trial has 2 out 5 successes for a total of 7 out of 20 successes 

summarize_beta_binomial(alpha = 9, beta = 16, y = 7, n = 20) #final posterior is Beta(16, 29)
##       model alpha beta      mean      mode         var         sd
## 1     prior     9   16 0.3600000 0.3478261 0.008861538 0.09413574
## 2 posterior    16   29 0.3555556 0.3488372 0.004981213 0.07057771

4.17

  1. Employee’s prior pdfs.
plot_beta(4,3, mean = TRUE, mode = TRUE)

quantile(rbeta(1000, 4,3))
##         0%        25%        50%        75%       100% 
## 0.06857635 0.45418148 0.58287898 0.70782597 0.98727103
summarize_beta(4,3)
##        mean mode        var        sd
## 1 0.5714286  0.6 0.03061224 0.1749636
#The employees' prior understanding shows they think it's more likely that visitors will click on the site (there's a left-skew). The quantile function shows that f(pi) is centered around 0.577; the mean is 0.5714286. 
  1. Using given prior, Beta(4,3) and pi|(Y=y)~Beta(alpha + y, B+ n - y) we can find the posteriors "from scratch). For example the first employee, with y = 0 and n =1, will have posterior alpha of 4 + 0 = 4 and posterior beta of 3 + 1 - 0 = 4 so a posterior of Beta(4,4). plot_beta_binomial() confirms this. We can automate a bit using vectors.
a <- 4
b <- 3
yclicks <- c(0, 3, 20)
nsamp <- c(1, 10, 100) #employee's sample sizes 
postalpha <- a + yclicks
postbeta <- b + nsamp - yclicks

The posterior models are Beta(4,4), Beta(7,10), and Beta(24,20).

c&d)

#employee 1
plot_beta_binomial(alpha = 4, beta = 3, y = 0, n = 1)

summarize_beta_binomial(alpha = 4, beta = 3, y = 0, n = 1)
##       model alpha beta      mean mode        var        sd
## 1     prior     4    3 0.5714286  0.6 0.03061224 0.1749636
## 2 posterior     4    4 0.5000000  0.5 0.02777778 0.1666667
#employee 2
plot_beta_binomial(alpha = 4, beta = 3, y = 3, n = 10)

summarize_beta_binomial(alpha = 4, beta = 3, y = 3, n = 10)
##       model alpha beta      mean mode        var        sd
## 1     prior     4    3 0.5714286  0.6 0.03061224 0.1749636
## 2 posterior     7   10 0.4117647  0.4 0.01345636 0.1160016
#employee 3 
plot_beta_binomial(alpha = 4, beta = 3, y = 20, n = 100)

summarize_beta_binomial(alpha = 4, beta = 3, y = 20, n = 100)
##       model alpha beta      mean      mode         var         sd
## 1     prior     4    3 0.5714286 0.6000000 0.030612245 0.17496355
## 2 posterior    24   83 0.2242991 0.2190476 0.001611009 0.04013738

The first employee had the worst dataset among the three so the prior really influenced the posterior. The second employee has the most balance between his prior and data among the three employees. The final employee has the strength of a dataset with high n so it has a posterior with the least variance among the three and that is heavily influenced by the data. I would rely on the third employee’s model to make decisions about pulling or keeping the ad.

Exercise 4.18

  1. The posterior’s at the end of the first day will be Beta(4,4) that we found in the last problem.

Day 2 will see the addition of 10 data points and 3 successes for a total of y2 = 3 n2 = 11. Using this and pi|(Y=y)~Beta(alpha + y, B + n - y) and our updated prior will give us day 2 posterior of Beta(7,12).

Day 3 will have Beta(7,12) as our new prior. We add 100 data points and 20 successes for y3 = 23 and n3 = 111. alpha + y3 = 30; beta + n3 - y3 = 100 for Beta(30, 100).

#day 1
plot_beta_binomial(alpha = 4, beta = 3, y = 0, n = 1)

summarize_beta_binomial(alpha = 4, beta = 3, y = 0, n = 1) #posterior is Beta(4,4)
##       model alpha beta      mean mode        var        sd
## 1     prior     4    3 0.5714286  0.6 0.03061224 0.1749636
## 2 posterior     4    4 0.5000000  0.5 0.02777778 0.1666667
#day 2
plot_beta_binomial(alpha = 4, beta = 4, y = 3, n = 11)

summarize_beta_binomial(alpha = 4, beta = 4, y = 3, n = 11) #posterior is Beta(7,12)
##       model alpha beta      mean      mode        var        sd
## 1     prior     4    4 0.5000000 0.5000000 0.02777778 0.1666667
## 2 posterior     7   12 0.3684211 0.3529412 0.01163435 0.1078626
#day 3
plot_beta_binomial(alpha = 7, beta = 12, y = 23, n = 111)

summarize_beta_binomial(alpha = 7, beta = 12, y = 23, n = 111) #posterior is Beta(30, 100)
##       model alpha beta      mean      mode         var         sd
## 1     prior     7   12 0.3684211 0.3529412 0.011634349 0.10786264
## 2 posterior    30  100 0.2307692 0.2265625 0.001355075 0.03681134
#As they collected more data the fourth employee reduced variance in his posterior model. The fourth employee can be more certain of his pi. 
  1. If I understand this question, suppose the fourth employee started with Beta(4,3) and went straight to y = 23 and n = 111 instead of going sequentially, the plot and summary would like the below.
plot_beta_binomial(alpha = 4, beta = 3, y = 23, n = 111)

summarize_beta_binomial(alpha = 4, beta = 3, y = 23, n = 111) #posterior is Beta (27,91).
##       model alpha beta      mean      mode        var         sd
## 1     prior     4    3 0.5714286 0.6000000 0.03061224 0.17496355
## 2 posterior    27   91 0.2288136 0.2241379 0.00148284 0.03850766
#Comparing this to Beta(30, 100) from the sequential analysis, we notice that variance is slightly higher in the non-sequential analysis (0.00148284 versus 0.001355075) and that the meanshave a small difference. This demonstrates that more certainty in a posterior model can be achieved if we update our priors as we collect more information. 

Exercise 4.19

Import bechdel data.

data(bechdel, package = "bayesrules")
  1. John analyzes movies from 1980.
bechdel |>  
  filter(year == 1980) |>  
  tabyl(binary) |> 
  adorn_totals("row")
##  binary  n   percent
##    FAIL 10 0.7142857
##    PASS  4 0.2857143
##   Total 14 1.0000000

His first day’s posterior can be modeled and summarized with the below.

plot_beta_binomial(alpha = 1, beta = 1, y = 4, n = 14)

summarize_beta_binomial(alpha = 1, beta = 1, y = 4, n = 14) #Posterior is Beta(5,11)
##       model alpha beta   mean      mode        var        sd
## 1     prior     1    1 0.5000       NaN 0.08333333 0.2886751
## 2 posterior     5   11 0.3125 0.2857143 0.01263787 0.1124183
  1. Day 2, John analyzes movies from the 1990s with updated prior from day 1.
bechdel |>  
  filter(year == 1990) |>  
  tabyl(binary) |> 
  adorn_totals("row")
##  binary  n percent
##    FAIL  9     0.6
##    PASS  6     0.4
##   Total 15     1.0

Day 2 model.

plot_beta_binomial(alpha = 5, beta = 11, y = 10, n = 29) #total passes so far equals 6 + 4 and updated n size is 14 + 15

summarize_beta_binomial(alpha = 5, beta = 11, y = 10, n = 29) #Posterior is Beta(15,30)
##       model alpha beta      mean      mode         var        sd
## 1     prior     5   11 0.3125000 0.2857143 0.012637868 0.1124183
## 2 posterior    15   30 0.3333333 0.3255814 0.004830918 0.0695048
  1. Day 3 model.
bechdel |>  
  filter(year == 2000) |>  
  tabyl(binary) |> 
  adorn_totals("row")
##  binary  n   percent
##    FAIL 34 0.5396825
##    PASS 29 0.4603175
##   Total 63 1.0000000

Plot and summary with updated y an n and new prior from day 2.

plot_beta_binomial(alpha = 15, beta = 30, y = 39, n = 92) #total passes so far equals 6 + 4 + 29 and total n = 63 + 15 + 14

summarize_beta_binomial(alpha = 15, beta = 30, y = 39, n = 92) #Posterior is Beta(54,83)
##       model alpha beta      mean      mode         var         sd
## 1     prior    15   30 0.3333333 0.3255814 0.004830918 0.06950480
## 2 posterior    54   83 0.3941606 0.3925926 0.001730420 0.04159832
  1. Jenna’s non-sequential method.
bechdel |>  
  filter(year %in% c("1980", "1990", "2000")) |>  #filtered by three years of relevance with code adapted from Moderndrive 
  tabyl(binary) |> 
  adorn_totals("row")
##  binary  n  percent
##    FAIL 53 0.576087
##    PASS 39 0.423913
##   Total 92 1.000000
plot_beta_binomial(alpha = 1, beta = 1, y = 39, n = 92)

summarize_beta_binomial(alpha = 1, beta = 1, y = 39, n = 92) #Posterior is Beta(40, 54)
##       model alpha beta      mean     mode         var         sd
## 1     prior     1    1 0.5000000      NaN 0.083333333 0.28867513
## 2 posterior    40   54 0.4255319 0.423913 0.002573205 0.05072677

Exercise 4.20

Frequentists often begin their analyses with a null (and alternate) hypothesis which is different from how Bayesians begin by estimating the distribution of probabilities for a set of data. A similarity is that both Bayesians and frequentists allow for new data to change their prior or hypothesis, respectively. Both also have the same subjectivity issues that can come with data collection and management practices.